1 // Written in the D programming language.
2 
3 /**
4    Functions that handle (parallel) IO to disk via HDF5.
5 
6    Copyright: Stefan Frijters 2011-2014
7 
8    License: $(HTTP www.gnu.org/licenses/gpl-3.0.txt, GNU General Public License - version 3 (GPL-3.0)).
9 
10    Authors: Stefan Frijters
11 
12    Macros:
13         TR = <tr>$0</tr>
14         TH = <th>$0</th>
15         TD = <td>$0</td>
16         TABLE = <table border=1 cellpadding=4 cellspacing=0>$0</table>
17 */
18 
19 module dlbc.io.hdf5;
20 
21 public import hdf5.hdf5;
22 
23 import std..string: toStringz;
24 import std.traits;
25 
26 import dlbc.fields.field;
27 import dlbc.io.checkpoint;
28 import dlbc.io.io;
29 import dlbc.lattice;
30 import dlbc.logging;
31 import dlbc.parallel;
32 import dlbc.range;
33 
34 /**
35    Use chunked HDF5. This is normally not recommended.
36 */
37 @("param") bool writeChunked = false;
38 
39 private {
40   bool hdf5HasStarted = false;
41   immutable defaultDatasetName = "/OutArray";
42   immutable defaultInputFileAName = "input";
43   immutable defaultMetadataGName = "metadata";
44   immutable defaultGlobalsGName = "globals";
45 }
46 
47 /**
48    This function wraps a call to $(D H5open()) to start up HDF5 and reports the version of the HDF5 library.
49 
50    This function should only be called once during a given program run.
51 */
52 void startHDF5() {
53   if (hdf5HasStarted ) {
54     return;
55   }
56 
57   uint majnum, minnum, relnum;
58   herr_t e;
59 
60   e = H5open();
61   if ( e != 0 ) {
62     writeLogF("H5open returned non-zero value.");
63   }
64 
65   H5get_libversion(&majnum, &minnum, &relnum);
66   H5check_version(majnum, minnum, relnum);
67   writeLogRN("Opened HDF5 v%d.%d.%d.", majnum, minnum, relnum);
68 }
69 
70 /**
71    This function wraps a call to $(D H5close()) to gracefully shut down HDF5.
72 
73    This function should only be called once during a given program run.
74 */
75 void endHDF5() {
76   H5close();
77   writeLogRN("Closed HDF5.");
78 }
79 
80 /**
81    This template function returns an HDF5 data type identifier based on the type T.
82    If T is an array, the base type is returned.
83 
84    Params:
85      T = a type
86 
87    Returns: the corresponding HDF5 data type identifier
88 */
89 hid_t hdf5Typeof(T)() @property {
90   import dlbc.range;
91   import std.traits;
92   static if ( isArray!T ) {
93     return hdf5Typeof!(BaseElementType!T);
94   }
95   else {
96     static if ( is(T : int) ) {
97       return H5T_NATIVE_INT;
98     }
99     else static if ( is(T == double) ) {
100       return H5T_NATIVE_DOUBLE;
101     }
102     else {
103       static assert(0, "Datatype not implemented for HDF5.");
104     }
105   }
106 }
107 
108 /**
109    This template function returns the length of a static array of type T or 1 if
110    the type is not an array.
111 
112    Params:
113      T = a type
114 
115    Returns: the corresponding length
116 */
117 hsize_t hdf5LengthOf(T)() @property {
118   import dlbc.range;
119   return LengthOf!T;
120 }
121 
122 /**
123    Write a field to disk using HDF5.
124 
125    Params:
126      field = field to be written
127      name = name of the field, to be used in the file name
128      time = current timestep
129      isCheckpoint = whether this is checkpoint-related (different file name, write globals)
130 */
131 void dumpFieldHDF5(T)(ref T field, const string name, const uint time = 0, const bool isCheckpoint = false) if ( isField!T ) {
132   hsize_t[] dimsg;
133   hsize_t[] dimsl;
134   hsize_t[] count;
135   hsize_t[] stride;
136   hsize_t[] block;
137   hsize_t[] start;
138   hsize_t[] arrstart;
139 
140   immutable type_id = hdf5Typeof!(T.type);
141   immutable typeLen = hdf5LengthOf!(T.type);
142 
143   if ( field.size <= 1 ) return;
144 
145   auto dim = field.dimensions;
146 
147   static if ( field.dimensions == 3 ) {
148     if ( typeLen > 1 ) {
149       dim++; // One more dimension to store the vector component.
150       dimsg = [ gn[0], gn[1], gn[2], typeLen ];
151       dimsl = [ field.nH[0], field.nH[1], field.nH[2], typeLen ];
152       count = [ 1, 1, 1, 1 ];
153       stride = [ 1, 1, 1, 1 ];
154       block = [ field.n[0], field.n[1], field.n[2], typeLen ];
155       start = [ M.c[0]*field.n[0], M.c[1]*field.n[1], M.c[2]*field.n[2], 0 ];
156       arrstart = [ field.haloSize, field.haloSize, field.haloSize, 0 ];
157     }
158     else {
159       dimsg = [ gn[0], gn[1], gn[2] ];
160       dimsl = [ field.nH[0], field.nH[1], field.nH[2] ];
161       count = [ 1, 1, 1 ];
162       stride = [ 1, 1, 1 ];
163       block = [ field.n[0], field.n[1], field.n[2] ];
164       start = [ M.c[0]*field.n[0], M.c[1]*field.n[1], M.c[2]*field.n[2] ];
165       arrstart = [ field.haloSize, field.haloSize, field.haloSize ];
166     }
167   }
168   else static if ( field.dimensions == 2 ) {
169     if ( typeLen > 1 ) {
170       dim++; // One more dimension to store the vector component.
171       dimsg = [ gn[0], gn[1], typeLen ];
172       dimsl = [ field.nH[0], field.nH[1], typeLen ];
173       count = [ 1, 1, 1 ];
174       stride = [ 1, 1, 1 ];
175       block = [ field.n[0], field.n[1], typeLen ];
176       start = [ M.c[0]*field.n[0], M.c[1]*field.n[1], 0 ];
177       arrstart = [ field.haloSize, field.haloSize, 0 ];
178     }
179     else {
180       dimsg = [ gn[0], gn[1] ];
181       dimsl = [ field.nH[0], field.nH[1] ];
182       count = [ 1, 1 ];
183       stride = [ 1, 1 ];
184       block = [ field.n[0], field.n[1] ];
185       start = [ M.c[0]*field.n[0], M.c[1]*field.n[1] ];
186       arrstart = [ field.haloSize, field.haloSize ];
187     }
188   }
189   else static if ( field.dimensions == 1 ) {
190     if ( typeLen > 1 ) {
191       dim++; // One more dimension to store the vector component.
192       dimsg = [ gn[0], typeLen ];
193       dimsl = [ field.nH[0], typeLen ];
194       count = [ 1, 1 ];
195       stride = [ 1, 1 ];
196       block = [ field.n[0], typeLen ];
197       start = [ M.c[0]*field.n[0], 0 ];
198       arrstart = [ field.haloSize, 0 ];
199     }
200     else {
201       dimsg = [ gn[0] ];
202       dimsl = [ field.nH[0] ];
203       count = [ 1 ];
204       stride = [ 1 ];
205       block = [ field.n[0] ];
206       start = [ M.c[0]*field.n[0] ];
207       arrstart = [ field.haloSize ];
208     }
209   }
210   else {
211     static assert(0, "dumpFieldHDF5 not implemented for dimensions > 3.");
212   }
213 
214   MPI_Info info = MPI_INFO_NULL;
215 
216   string fileNameString;
217   if ( isCheckpoint ) {
218     fileNameString = makeFilenameCpOutput!(FileFormat.HDF5)(name, time);
219     writeLogRI("HDF writing to checkpoint file '%s'.", fileNameString);
220   }
221   else {
222     fileNameString = makeFilenameOutput!(FileFormat.HDF5)(name, time);
223     writeLogRI("HDF writing to file '%s'.", fileNameString);
224   }
225   auto fileName = fileNameString.toStringz();
226 
227   // if (hdf_use_ibm_largeblock_io) then
228   //   if (dbg_report_hdf5) call log_msg("HDF using IBM_largeblock_io")
229   //   call MPI_Info_create(info, err)
230   //   call MPI_Info_set(info, "IBM_largeblock_io", "true", err)
231   // end if
232 
233   // Create the file collectively.
234   auto fapl_id = H5Pcreate(H5P_FILE_ACCESS);
235   H5Pset_fapl_mpio(fapl_id, M.comm, info);
236   auto file_id = H5Fcreate(fileName, H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id);
237   H5Pclose(fapl_id);
238 
239   // Create the data spaces for the dataset, using global and local size
240   // (including halo!), respectively.
241   auto filespace = H5Screate_simple(dim, dimsg.ptr, null);
242   auto memspace = H5Screate_simple(dim, dimsl.ptr, null);
243 
244   hid_t dcpl_id;
245   if ( writeChunked ) {
246     dcpl_id = H5Pcreate(H5P_DATASET_CREATE);
247     H5Pset_chunk(dcpl_id, dim, block.ptr);
248   }
249   else {
250     dcpl_id = H5P_DEFAULT;
251   }
252 
253   auto datasetName = defaultDatasetName.toStringz();
254   auto dataset_id = H5Dcreate2(file_id, datasetName, type_id, filespace, H5P_DEFAULT, dcpl_id, H5P_DEFAULT);
255   H5Sclose(filespace);
256   H5Pclose(dcpl_id);
257 
258   filespace = H5Dget_space(dataset_id);
259   // In the filespace, we have an offset to make sure we write in the correct chunk.
260   H5Sselect_hyperslab(filespace, H5S_seloper_t.H5S_SELECT_SET, start.ptr, stride.ptr, count.ptr, block.ptr);
261   // In the memspace, we cut off the halo region.
262   H5Sselect_hyperslab(memspace, H5S_seloper_t.H5S_SELECT_SET, arrstart.ptr, stride.ptr, count.ptr, block.ptr);
263 
264   // Set up for collective IO.
265   auto dxpl_id = H5Pcreate(H5P_DATASET_XFER);
266   H5Pset_dxpl_mpio(dxpl_id, H5FD_mpio_xfer_t.H5FD_MPIO_COLLECTIVE);
267 
268   H5Dwrite(dataset_id, type_id, memspace, filespace, dxpl_id, field.arr._data.ptr);
269 
270   // Close all remaining handles.
271   H5Sclose(filespace);
272   H5Sclose(memspace);
273   H5Dclose(dataset_id);
274   H5Pclose(dxpl_id);
275   H5Fclose(file_id);
276 
277   // Only root writes the attributes
278   if ( M.isRoot() ) {
279     file_id = H5Fopen(fileName, H5F_ACC_RDWR, H5P_DEFAULT);
280     auto root_id = H5Gopen2(file_id, "/", H5P_DEFAULT);
281     // Write the input file
282     dumpInputFileAttributes(root_id);
283     // Write the metadata
284     dumpMetadata(root_id);
285     H5Gclose(root_id);
286     H5Fclose(file_id);
287     // Write the global state
288     if ( isCheckpoint ) {
289       dumpCheckpointGlobals(fileName);
290     }
291   }
292 }
293 
294 /**
295    Write metadata as attributes.
296 */
297 void dumpMetadata(const hid_t root_id) {
298   import dlbc.revision;
299   auto group_id = H5Gcreate2(root_id, "metadata", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
300   dumpAttributeHDF5(revisionHash, "revisionHash", group_id);
301   dumpAttributeHDF5(revisionDesc, "revisionDesc", group_id);
302   dumpAttributeHDF5(revisionBranch, "revisionBranch", group_id);
303   dumpAttributeHDF5(revisionChanged, "revisionChanged", group_id);
304   dumpAttributeHDF5(revisionChanges, "revisionChanges", group_id);
305   H5Gclose(group_id);
306 }
307 
308 /**
309    Read a field from disk using HDF5.
310 
311    Params:
312      field = field to be read
313      fileNameString = name of the file to be read from
314      isCheckpoint = whether this is checkpoint related (reads checkpoint globals)
315 */
316 void readFieldHDF5(T)(ref T field, const string fileNameString, const bool isCheckpoint = false) if ( isField!T ) {
317   hsize_t[] dimsg;
318   hsize_t[] dimsl;
319   hsize_t[] count;
320   hsize_t[] stride;
321   hsize_t[] block;
322   hsize_t[] start;
323   hsize_t[] arrstart;
324 
325   immutable type_id = hdf5Typeof!(T.type);
326   immutable typeLen = hdf5LengthOf!(T.type);
327 
328   if ( field.size <= 1 ) return;
329 
330   auto dim = field.dimensions;
331 
332   static if ( field.dimensions == 3 ) {
333     if ( typeLen > 1 ) {
334       dim++; // One more dimension to store the vector component.
335       dimsg = [ gn[0], gn[1], gn[2], typeLen ];
336       dimsl = [ field.nH[0], field.nH[1], field.nH[2], typeLen ];
337       count = [ 1, 1, 1, 1 ];
338       stride = [ 1, 1, 1, 1 ];
339       block = [ field.n[0], field.n[1], field.n[2], typeLen ];
340       start = [ M.c[0]*field.n[0], M.c[1]*field.n[1], M.c[2]*field.n[2], 0 ];
341       arrstart = [ field.haloSize, field.haloSize, field.haloSize, 0 ];
342     }
343     else {
344       dimsg = [ gn[0], gn[1], gn[2] ];
345       dimsl = [ field.nH[0], field.nH[1], field.nH[2] ];
346       count = [ 1, 1, 1 ];
347       stride = [ 1, 1, 1 ];
348       block = [ field.n[0], field.n[1], field.n[2] ];
349       start = [ M.c[0]*field.n[0], M.c[1]*field.n[1], M.c[2]*field.n[2] ];
350       arrstart = [ field.haloSize, field.haloSize, field.haloSize ];
351     }
352   }
353   else static if ( field.dimensions == 2 ) {
354     if ( typeLen > 1 ) {
355       dim++; // One more dimension to store the vector component.
356       dimsg = [ gn[0], gn[1], typeLen ];
357       dimsl = [ field.nH[0], field.nH[1], typeLen ];
358       count = [ 1, 1, 1 ];
359       stride = [ 1, 1, 1 ];
360       block = [ field.n[0], field.n[1], typeLen ];
361       start = [ M.c[0]*field.n[0], M.c[1]*field.n[1], 0 ];
362       arrstart = [ field.haloSize, field.haloSize, 0 ];
363     }
364     else {
365       dimsg = [ gn[0], gn[1] ];
366       dimsl = [ field.nH[0], field.nH[1] ];
367       count = [ 1, 1 ];
368       stride = [ 1, 1 ];
369       block = [ field.n[0], field.n[1] ];
370       start = [ M.c[0]*field.n[0], M.c[1]*field.n[1] ];
371       arrstart = [ field.haloSize, field.haloSize ];
372     }
373   }
374   else static if ( field.dimensions == 1 ) {
375     if ( typeLen > 1 ) {
376       dim++; // One more dimension to store the vector component.
377       dimsg = [ gn[0], typeLen ];
378       dimsl = [ field.nH[0], typeLen ];
379       count = [ 1, 1 ];
380       stride = [ 1, 1 ];
381       block = [ field.n[0], typeLen ];
382       start = [ M.c[0]*field.n[0], 0 ];
383       arrstart = [ field.haloSize, 0 ];
384     }
385     else {
386       dimsg = [ gn[0] ];
387       dimsl = [ field.nH[0] ];
388       count = [ 1 ];
389       stride = [ 1 ];
390       block = [ field.n[0] ];
391       start = [ M.c[0]*field.n[0] ];
392       arrstart = [ field.haloSize ];
393     }
394   }
395   else {
396     static assert(0, "readFieldHDF5 not implemented for dimensions > 3.");
397   }
398 
399   MPI_Info info = MPI_INFO_NULL;
400 
401   auto fileName = fileNameString.toStringz();
402 
403   writeLogRI("HDF reading from file '%s'.", fileNameString);
404 
405   // if (hdf_use_ibm_largeblock_io) then
406   //   if (dbg_report_hdf5) call log_msg("HDF using IBM_largeblock_io")
407   //   call MPI_Info_create(info, err)
408   //   call MPI_Info_set(info, "IBM_largeblock_io", "true", err)
409   // end if
410 
411   // Create the file collectively.
412   auto fapl_id = H5Pcreate(H5P_FILE_ACCESS);
413   H5Pset_fapl_mpio(fapl_id, M.comm, info);
414   auto file_id = H5Fopen(fileName, H5F_ACC_RDONLY, fapl_id);
415   H5Pclose(fapl_id);
416 
417   auto datasetName = defaultDatasetName.toStringz();
418   auto dataset_id = H5Dopen2(file_id, datasetName, H5P_DEFAULT);
419 
420   auto filespace = H5Dget_space(dataset_id);
421 
422   // In the filespace, we have an offset to make sure we write in the correct chunk.
423   H5Sselect_hyperslab(filespace, H5S_seloper_t.H5S_SELECT_SET, start.ptr, stride.ptr, count.ptr, block.ptr);
424   // In the memspace, we cut off the halo region.
425   auto memspace = H5Screate_simple(dim, dimsl.ptr, null);
426   H5Sselect_hyperslab(memspace, H5S_seloper_t.H5S_SELECT_SET, arrstart.ptr, stride.ptr, count.ptr, block.ptr);
427 
428   // Set up for collective IO.
429   auto dxpl_id = H5Pcreate(H5P_DATASET_XFER);
430   H5Pset_dxpl_mpio(dxpl_id, H5FD_mpio_xfer_t.H5FD_MPIO_COLLECTIVE);
431 
432   auto e = H5Dread(dataset_id, type_id, memspace, filespace, dxpl_id, field.arr._data.ptr);
433   if ( e != 0 ) {
434     writeLogF("Unable to open '%s'.", fileNameString);
435   }
436 
437   // Close all remaining handles.
438   H5Sclose(filespace);
439   H5Sclose(memspace);
440   H5Dclose(dataset_id);
441   H5Pclose(dxpl_id);
442   H5Fclose(file_id);
443 
444   // Only root reads the attributes
445   if ( isCheckpoint ) {
446     if ( M.isRoot() ) {
447       readCheckpointGlobals(fileName);
448     }
449     broadcastCheckpointGlobals();
450   }
451 }
452 
453 /**
454    Dump a single piece of data as an attribute to an HDF5 file.
455 
456    Params:
457      data = data to write
458      name = name of the attribute
459      loc_id = id of the location to attach to
460 */
461 void dumpAttributeHDF5(T)(const T data, const string name, hid_t loc_id) {
462   auto attrname = name.toStringz();
463   hid_t sid, aid, type;
464   static if ( is (T == string ) ) {
465     hsize_t[] length = [ 1 ];
466     auto attrdata = data.toStringz();
467     type = H5Tcopy (H5T_C_S1);
468     H5Tset_size (type, H5T_VARIABLE);
469     sid = H5Screate_simple(1, length.ptr, null);
470     aid = H5Acreate2(loc_id, attrname, type, sid, H5P_DEFAULT, H5P_DEFAULT);
471     H5Awrite(aid, type, &attrdata);
472     H5Tclose(type);
473   }
474   else {
475     hsize_t[] length = [ 1 ];
476     sid = H5Screate_simple(1, length.ptr, null);
477     aid = H5Acreate2(loc_id, attrname, hdf5Typeof!T, sid, H5P_DEFAULT, H5P_DEFAULT);
478     H5Awrite(aid, hdf5Typeof!T, &data);
479   }
480   H5Aclose(aid);
481   H5Sclose(sid);
482 }
483 
484 /**
485    Read a single piece of data from an attribute of an HDF5 file.
486 
487    Params:
488      name = name of the attribute
489      loc_id = id of the group
490 */
491 T readAttributeHDF5(T)(const string name, hid_t loc_id) {
492   import std.conv: to;
493 
494   auto attrname = name.toStringz();
495   hid_t sid, aid;
496   static if ( is (T == string ) ) {
497     auto type = H5Tcopy (H5T_C_S1);
498     H5Tset_size (type, H5T_VARIABLE);
499     auto att = H5Aopen_by_name(loc_id, ".", attrname, H5P_DEFAULT, H5P_DEFAULT);
500     auto ftype = H5Aget_type(att);
501     // auto type_class = H5Tget_class (ftype);
502     auto dataspace = H5Aget_space(att);
503 
504     hsize_t[] dims;
505     dims.length = 1;
506     H5Sget_simple_extent_dims(dataspace, dims.ptr, null);
507  
508     char*[] chars;
509     chars.length = dims[0];
510     type = H5Tget_native_type(ftype, H5T_direction_t.H5T_DIR_ASCEND);
511     H5Aread(att, type, chars.ptr);
512 
513     H5Sclose(dataspace);
514     H5Tclose(ftype);
515     H5Aclose(att);
516     H5Tclose(type);
517     return to!string(chars[0]);
518   }
519   else {
520     hsize_t[] length = [ 1 ];
521     T result;
522     sid = H5Screate_simple(1, length.ptr, null);
523     aid = H5Aopen_by_name(loc_id, ".", attrname, H5P_DEFAULT, H5P_DEFAULT);
524     H5Aread(aid, hdf5Typeof!T, &result);
525     H5Aclose(aid);
526     H5Sclose(sid);
527     return result;
528   }
529 }
530 
531 /**
532    Dump the contents of the input file as an attribute.
533 
534    Params:
535      loc_id = id of the locataion to attach to
536 */
537 void dumpInputFileAttributes(hid_t loc_id) {
538   import dlbc.parameters: inputFileData;
539   hid_t sid, aid, type;
540   auto attrname = defaultInputFileAName.toStringz();
541   hsize_t[] length = [ inputFileData.length ];
542 
543   immutable(char)*[] stringz;
544   stringz.length = inputFileData.length;
545   foreach(immutable i, e; inputFileData) {
546     stringz[i] = e.toStringz();
547   }
548   type = H5Tcopy(H5T_C_S1);
549   H5Tset_size(type, H5T_VARIABLE);
550   sid = H5Screate_simple(1, length.ptr, null);
551   aid = H5Acreate2(loc_id, attrname, type, sid, H5P_DEFAULT, H5P_DEFAULT);
552   H5Awrite(aid, type, stringz.ptr);
553   H5Tclose(type);
554   H5Aclose(aid);
555   H5Sclose(sid);
556 }
557 
558 /**
559    Read the contents of the input file attribute into strings.
560 
561    Params:
562      fileNameString = name of the file to read from
563 
564    Returns: array of strings corresponding to lines of the input file.
565 */
566 string[] readInputFileAttributes(const string fileNameString) {
567   import dlbc.parameters: inputFileData;
568   import std.conv;
569 
570   auto attrname = defaultInputFileAName.toStringz();
571   auto fileName = fileNameString.toStringz();
572   auto dsetName = defaultDatasetName.toStringz();
573 
574   auto file = H5Fopen(fileName, H5F_ACC_RDONLY, H5P_DEFAULT);
575   auto type = H5Tcopy (H5T_C_S1);
576   H5Tset_size (type, H5T_VARIABLE);
577   auto root = H5Gopen2(file, "/", H5P_DEFAULT);
578   auto att = H5Aopen_by_name(root, ".", attrname, H5P_DEFAULT, H5P_DEFAULT);
579   auto ftype = H5Aget_type(att);
580   // auto type_class = H5Tget_class (ftype);
581   auto dataspace = H5Aget_space(att);
582 
583   hsize_t[] dims;
584   dims.length = 1;
585   H5Sget_simple_extent_dims(dataspace, dims.ptr, null);
586 
587   char*[] chars;
588   chars.length = to!size_t(dims[0]);
589   type = H5Tget_native_type(ftype, H5T_direction_t.H5T_DIR_ASCEND);
590   H5Aread(att, type, chars.ptr);
591 
592   H5Sclose(dataspace);
593   H5Tclose(ftype);
594   H5Aclose(att);
595   H5Gclose(root);
596   H5Tclose(type);
597   H5Fclose(file);
598 
599   string[] strings;
600   foreach(e; chars) {
601     strings ~= to!string(e);
602   }
603 
604   return strings;
605 }
606